1. IMPORT ALL LIBRARIESΒΆ
InΒ [Β ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings("ignore")
2. LOAD THE DATASETΒΆ
InΒ [Β ]:
data = pd.read_excel('../Data/Bidru Awana Final.xlsx')
data.head()
Out[Β ]:
| Year | Station | Day | Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1995 | Bidru Awana | 1.0 | 0.235 | 0.223 | 0.223 | 0.248 | 0.331 | 0.26 | 0.260 | 0.547 | 0.413 | 0.362 | 0.109 | 0.082 |
| 1 | 1995 | Bidru Awana | 2.0 | 0.235 | 0.223 | 0.223 | 0.248 | 0.331 | 0.301 | 0.260 | 4.351 | 0.59 | 0.331 | 0.109 | 0.082 |
| 2 | 1995 | Bidru Awana | 3.0 | 0.235 | 0.223 | 0.223 | 0.223 | 0.301 | 0.274 | 0.248 | 1.516 | 0.683 | 0.316 | 0.109 | 0.082 |
| 3 | 1995 | Bidru Awana | 4.0 | 0.235 | 0.223 | 0.223 | 0.212 | 0.301 | 0.26 | 0.248 | 1.436 | 2.582 | 0.301 | 0.109 | 0.082 |
| 4 | 1995 | Bidru Awana | 5.0 | 0.235 | 0.223 | 0.223 | 0.212 | 1.683 | 0.26 | 0.248 | 2.152 | 3.193 | 0.301 | 0.101 | 0.082 |
InΒ [Β ]:
# List of stations to keep
stations_to_keep = ["Mean", "Flow (MCM)", "Maximum", "Minimum", "Runoff (mm)"]
# Filter the dataframe
filtered_df = data[data["Station "].isin(stations_to_keep)]
filtered_df.tail()
Out[Β ]:
| Year | Station | Day | Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 967 | 2021 | Mean | NaN | 0.703 | 0.415 | 0.415 | 0.725 | 0.260 | 0.324 | 1.789 | 1.770 | 0.549 | 0.384 | 0.246 | 0.197 |
| 968 | 2021 | Flow (MCM) | NaN | 1.884 | 1.112 | 1.112 | 1.879 | 0.696 | 0.84 | 4.793 | 4.741 | 1.423 | 1.027 | 0.637 | 0.526 |
| 969 | 2021 | Maximum | NaN | 0.842 | 0.508 | 0.508 | 1.033 | 0.858 | 4.483 | 9.308 | 8.756 | 2.34 | 1.127 | 0.281 | 0.255 |
| 970 | 2021 | Minimum | NaN | 0.501 | 0.326 | 0.326 | 0.484 | 0.095 | 0.109 | 0.177 | 0.368 | 0.323 | 0.243 | 0.209 | 0.177 |
| 971 | 2021 | Runoff (mm) | NaN | 3.582 | 2.114 | 2.114 | 3.572 | 1.972 | 2.479 | 6.897 | 11.641 | 4.716 | 5.058 | 5.533 | 2.838 |
InΒ [Β ]:
filtered_df.rename(columns={'Station ':'Parameter'}, inplace=True)
filtered_df.drop(columns={'Day'}, inplace=True)
filtered_df.head()
Out[Β ]:
| Year | Parameter | Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 31 | 1995 | Mean | 0.224 | 0.223 | 0.227 | 0.309 | 0.341 | 0.289 | 0.437 | 0.788 | 1 | 0.159 | 0.096 | 0.075 |
| 32 | 1995 | Flow (MCM) | 0.601 | 0.539 | 0.609 | 0.802 | 0.914 | 0.749 | 1.171 | 2.112 | 2.591 | 0.426 | 0.25 | 0.201 |
| 33 | 1995 | Maximum | 0.235 | 0.223 | 0.260 | 0.569 | 1.683 | 0.59 | 0.838 | 2.351 | 2.217 | 0.362 | 0.109 | 0.082 |
| 34 | 1995 | Minimum | 0.212 | 0.223 | 0.212 | 0.2 | 0.235 | 0.248 | 0.248 | 0.346 | 0.316 | 0.109 | 0.082 | 0.070 |
| 35 | 1995 | Runoff (mm) | 1.658 | 1.158 | 1.861 | 1.554 | 2.298 | 1.266 | 2.556 | 1.507 | 2.198 | 1.387 | 1.088 | 2.893 |
3. TRANSFORM THE DATASETΒΆ
InΒ [Β ]:
# Remove extra spaces
filtered_df.columns = filtered_df.columns.str.strip()
filtered_df.head()
Out[Β ]:
| Year | Parameter | Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 31 | 1995 | Mean | 0.224 | 0.223 | 0.227 | 0.309 | 0.341 | 0.289 | 0.437 | 0.788 | 1 | 0.159 | 0.096 | 0.075 |
| 32 | 1995 | Flow (MCM) | 0.601 | 0.539 | 0.609 | 0.802 | 0.914 | 0.749 | 1.171 | 2.112 | 2.591 | 0.426 | 0.25 | 0.201 |
| 33 | 1995 | Maximum | 0.235 | 0.223 | 0.260 | 0.569 | 1.683 | 0.59 | 0.838 | 2.351 | 2.217 | 0.362 | 0.109 | 0.082 |
| 34 | 1995 | Minimum | 0.212 | 0.223 | 0.212 | 0.2 | 0.235 | 0.248 | 0.248 | 0.346 | 0.316 | 0.109 | 0.082 | 0.070 |
| 35 | 1995 | Runoff (mm) | 1.658 | 1.158 | 1.861 | 1.554 | 2.298 | 1.266 | 2.556 | 1.507 | 2.198 | 1.387 | 1.088 | 2.893 |
InΒ [Β ]:
filtered_df.columns
Out[Β ]:
Index(['Year', 'Parameter', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
dtype='object')
InΒ [Β ]:
def transform_data(df):
# Melt the dataframe
melted_df = df.melt(id_vars=["Parameter", "Year"],
value_vars=["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"],
var_name="Month", value_name="Value")
# Map month names to numbers
month_mapping = {
"Jan": "01", "Feb": "02", "Mar": "03", "Apr": "04", "May": "05", "Jun": "06",
"Jul": "07", "Aug": "08", "Sep": "09", "Oct": "10", "Nov": "11", "Dec": "12"
}
melted_df["Month"] = melted_df["Month"].map(month_mapping)
# Combine Year and Month into a Date column
melted_df["Date"] = pd.to_datetime(melted_df["Year"].astype(str) + "-" + melted_df["Month"] + "-01")
# Select and reorder the final columns
transformed_df = melted_df[["Parameter", "Date", "Value"]]
return transformed_df
InΒ [Β ]:
# Transform the data
transformed_data = transform_data(filtered_df)
InΒ [Β ]:
transformed_data.head()
Out[Β ]:
| Parameter | Date | Value | |
|---|---|---|---|
| 0 | Mean | 1995-01-01 | 0.224 |
| 1 | Flow (MCM) | 1995-01-01 | 0.601 |
| 2 | Maximum | 1995-01-01 | 0.235 |
| 3 | Minimum | 1995-01-01 | 0.212 |
| 4 | Runoff (mm) | 1995-01-01 | 1.658 |
InΒ [Β ]:
# UNIQUE VALUES ONLY
unique_parameters = transformed_data['Parameter'].unique()
InΒ [Β ]:
for index, row in transformed_data.iterrows():
value = row['Value']
if not isinstance(value, (int, float)):
print(f"Index: {index}, Date: {row['Date']}, Value: {value}")
InΒ [Β ]:
transformed_data.Value.isnull().sum()
Out[Β ]:
0
InΒ [Β ]:
# Convert 'Value' column to float
transformed_data['Value'] = transformed_data['Value'].astype(float)
InΒ [Β ]:
# RENAMA THE PARAMETERS
rename_dict = {
unique_parameters[0]: 'Mean Flow',
unique_parameters[1]: 'FLow',
unique_parameters[2]: 'Max FLow',
unique_parameters[3]: 'Min Flow',
unique_parameters[4]: 'Runoff'
}
# Renaming the parameters
transformed_data['Parameter'] = transformed_data['Parameter'].replace(rename_dict)
transformed_data.head()
Out[Β ]:
| Parameter | Date | Value | |
|---|---|---|---|
| 0 | Mean Flow | 1995-01-01 | 0.224 |
| 1 | FLow | 1995-01-01 | 0.601 |
| 2 | Max FLow | 1995-01-01 | 0.235 |
| 3 | Min Flow | 1995-01-01 | 0.212 |
| 4 | Runoff | 1995-01-01 | 1.658 |
InΒ [Β ]:
# Parameter counts
transformed_data.Parameter.value_counts()
Out[Β ]:
Parameter Mean Flow 324 FLow 324 Max FLow 324 Min Flow 324 Runoff 324 Name: count, dtype: int64
4. PLOT VALUES OF EACH PARAMETERSΒΆ
InΒ [Β ]:
parameters = transformed_data['Parameter'].unique()
custom_colors = {
parameters[0]: '#2ca02c',
parameters[1]: '#8b0000',
parameters[2]: '#ff6347',
parameters[3]: '#8c564b',
parameters[4]: '#ff7f0e',
}
plt.figure(figsize=(40, 10))
for parameter in parameters:
parameter_data = transformed_data[transformed_data['Parameter'] == parameter]
parameter_data.set_index('Date', inplace=True)
sns.lineplot(data=parameter_data,
x=parameter_data.index,
y='Value',
marker='o',
label=parameter,
linewidth=1,
color=custom_colors[parameter])
# plt.title('Monthly Analysis', fontsize=24)
plt.xlabel('Date', fontsize=24)
plt.ylabel('Streamflow(m3/s)', fontsize=26)
plt.legend(loc='best', fontsize=24, framealpha=0.5)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.show()
InΒ [Β ]:
parameters = transformed_data['Parameter'].unique()
custom_colors = {
# parameters[0]: '#2ca02c',
# parameters[1]: '#8b0000',
parameters[2]: '#ff6347',
parameters[3]: '#8c564b',
# parameters[4]: '#ff7f0e',
}
plt.figure(figsize=(40, 20))
for parameter in parameters[2:4]:
parameter_data = transformed_data[transformed_data['Parameter'] == parameter]
parameter_data.set_index('Date', inplace=True)
sns.lineplot(data=parameter_data,
x=parameter_data.index,
y='Value',
marker='o',
label=parameter,
linewidth=1,
color=custom_colors[parameter])
# plt.title('Monthly Analysis', fontsize=24)
plt.xlabel('Date', fontsize=24)
plt.ylabel('Streamflow(m3/s)', fontsize=26)
plt.legend(loc='best', fontsize=24, framealpha=0.5)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.show()
InΒ [Β ]:
parameters = transformed_data['Parameter'].unique()
parameters
Out[Β ]:
array(['Mean Flow', 'FLow', 'Max FLow', 'Min Flow', 'Runoff'],
dtype=object)
5. FIND THE CORRELATIONS BETWEEN EACH PARAMETERSΒΆ
InΒ [Β ]:
transformed_data = transformed_data.drop_duplicates(subset=['Date', 'Parameter'])
# Pivot the data
pivot_data = transformed_data.pivot(index='Date', columns='Parameter', values='Value')
# Calculate the correlation matrix
correlation_matrix = pivot_data.corr()
# Plot the heatmap
plt.figure(figsize=(16, 10))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', square=True, linewidths=0.5, annot_kws={"size": 18})
# Adjust x-tick and y-tick labels rotation
plt.xticks(rotation=90, fontsize=20)
plt.yticks(rotation=0, fontsize=20)
plt.xlabel('')
plt.ylabel('')
plt.show()
6. COMMON FUNCTIONΒΆ
TIME SERIES TO SUPERVISEDΒΆ
InΒ [Β ]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
n_vars = 1 if type(data) is list else data.shape[1]
df = pd.DataFrame(data)
cols = []
for i in range(n_in, 0, -1):
cols.append(df.shift(i))
for i in range(0, n_out):
cols.append(df.shift(-i))
agg = pd.concat(cols, axis=1)
if dropnan:
agg.dropna(inplace=True)
return agg.values
ADD ROLLING FEATURESΒΆ
InΒ [Β ]:
def add_rolling_features(data, window=3):
df = pd.DataFrame(data)
df['rolling_mean'] = df.iloc[:, 0].rolling(window=window).mean()
df['rolling_std'] = df.iloc[:, 0].rolling(window=window).std()
df.dropna(inplace=True)
return df
TRAIN TEST SPLITTΒΆ
InΒ [Β ]:
def train_test_split(data, train_size=0.9):
n_train = int(len(data) * train_size)
return data[:n_train], data[n_train:]
MEASURING METRICSΒΆ
InΒ [Β ]:
def nash_sutcliffe_efficiency(y_true, y_pred):
return 1 - (np.sum((y_true - y_pred)**2) / np.sum((y_true - np.mean(y_true))**2))
def willmotts_index(y_true, y_pred):
return 1 - (np.sum((y_pred - y_true)**2) / np.sum((np.abs(y_pred - np.mean(y_true)) + np.abs(y_true - np.mean(y_true)))**2))
CROSS VALIDATION AND MODEL EVALUATIONSΒΆ
InΒ [Β ]:
def walk_forward_validation(data, model):
predictions = []
train, test = train_test_split(data)
history = [x for x in train.values]
for i in range(len(test)):
testX, testy = test.iloc[i, :-1].values, test.iloc[i, -1]
yhat = model(history, testX)
predictions.append(yhat)
history.append(test.iloc[i].values)
print('>expected=%.1f, predicted=%.1f' % (testy, yhat))
mae = mean_absolute_error(test.iloc[:, -1], predictions)
rmse = np.sqrt(np.mean((test.iloc[:, -1] - np.asarray(predictions))**2))
r2 = r2_score(test.iloc[:, -1], predictions)
nse = nash_sutcliffe_efficiency(test.iloc[:, -1], predictions)
willmott = willmotts_index(test.iloc[:, -1], predictions)
return mae, rmse, r2, nse, willmott, test.index, test.iloc[:, -1], predictions
7. MODELSΒΆ
1. RANDOM FORESTΒΆ
InΒ [Β ]:
def random_forest_forecast(train, testX):
train = np.asarray(train)
trainX, trainy = train[:, :-1], train[:, -1]
model = RandomForestRegressor(n_estimators=50)
model.fit(trainX, trainy)
yhat = model.predict([testX])
return yhat[0]
2. SVMΒΆ
InΒ [Β ]:
from sklearn.svm import SVR
def svm_forecast(train, testX):
train = np.asarray(train)
trainX, trainy = train[:, :-1], train[:, -1]
model = SVR(kernel='rbf')
model.fit(trainX, trainy)
yhat = model.predict([testX])
return yhat[0]
3. XGBRegressorΒΆ
InΒ [Β ]:
from xgboost import XGBRegressor
def xgboost_forecast(train, testX):
train = np.asarray(train)
trainX, trainy = train[:, :-1], train[:, -1]
model = XGBRegressor(n_estimators=50)
model.fit(trainX, trainy)
yhat = model.predict([testX])
return yhat[0]
4. LGBMRegressorΒΆ
InΒ [Β ]:
from lightgbm import LGBMRegressor
def lightgbm_forecast(train, testX):
train = np.asarray(train)
trainX, trainy = train[:, :-1], train[:, -1]
model = LGBMRegressor(n_estimators=50, verbose=-1)
model.fit(trainX, trainy)
yhat = model.predict([testX])
return yhat[0]
InΒ [Β ]:
# Color dictionary for different models
colors_dict = {
'LightGBM': '#27ad81',
'XGBoost': '#5dc863',
'SVM': '#aadc32',
'Random Forest': '#fde725'
}
6. MEAN FLOW ANALYSISΒΆ
InΒ [Β ]:
# Get the mean flow values
mean_flow = transformed_data[transformed_data['Parameter'] == parameters[0]]
mean_flow.head()
Out[Β ]:
| Parameter | Date | Value | |
|---|---|---|---|
| 0 | Mean Flow | 1995-01-01 | 0.224000 |
| 5 | Mean Flow | 1996-01-01 | 0.163161 |
| 10 | Mean Flow | 1997-01-01 | 0.181000 |
| 15 | Mean Flow | 1998-01-01 | 0.295000 |
| 20 | Mean Flow | 1999-01-01 | 0.144000 |
InΒ [Β ]:
# drop the parameter columns
mean_flow = mean_flow.drop(columns=['Parameter'])
InΒ [Β ]:
mean_flow.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 0 | 1995-01-01 | 0.224000 |
| 5 | 1996-01-01 | 0.163161 |
| 10 | 1997-01-01 | 0.181000 |
| 15 | 1998-01-01 | 0.295000 |
| 20 | 1999-01-01 | 0.144000 |
InΒ [Β ]:
# sort according to the date
mean_flow= mean_flow.sort_values(by='Date')
mean_flow.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 0 | 1995-01-01 | 0.224 |
| 135 | 1995-02-01 | 0.223 |
| 270 | 1995-03-01 | 0.227 |
| 405 | 1995-04-01 | 0.309 |
| 540 | 1995-05-01 | 0.341 |
InΒ [Β ]:
mean_flow.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 0 | 1995-01-01 | 0.224 |
| 135 | 1995-02-01 | 0.223 |
| 270 | 1995-03-01 | 0.227 |
| 405 | 1995-04-01 | 0.309 |
| 540 | 1995-05-01 | 0.341 |
InΒ [Β ]:
# Find the row with the maximum value
max_value_row = mean_flow.loc[mean_flow['Value'].idxmax()]
# Print the date with the maximum value
print(f"Date with the maximum value: {mean_flow['Date']}")
print(f"Maximum value: {max_value_row['Value']}")
Date with the maximum value: 0 1995-01-01
135 1995-02-01
270 1995-03-01
405 1995-04-01
540 1995-05-01
...
1075 2021-08-01
1210 2021-09-01
1345 2021-10-01
1480 2021-11-01
1615 2021-12-01
Name: Date, Length: 324, dtype: datetime64[ns]
Maximum value: 2.829
InΒ [Β ]:
plt.figure(figsize=(20, 8))
mean_flow.plot(x='Date', y='Value', kind='line', ax=plt.gca(), color='#2ca02c', fontsize=12)
plt.xlabel('Date', fontdict={'fontsize':20})
plt.ylabel('Mean Flow (m3/s)', fontdict={'fontsize':20})
plt.xticks(size = 20)
plt.yticks(size = 20)
plt.grid(linestyle='--')
plt.tight_layout()
plt.legend().remove()
plt.show()
InΒ [Β ]:
# Add fuller test to check the stationarity of the dataset
def adfuller_test(values):
result = adfuller(values)
labels = ['ADF Test Statistic','p-value','#Lags Used', 'Number of Observations Used']
for value, label in zip(result, labels):
print(label+' : '+str(value))
if result[1] <= 0.05:
print("Strong evidence against the null hypothesis(H0)")
else:
print("Weak evidence against the null hypothesis(H0)")
InΒ [Β ]:
# H0: It is non stationary
# H1: It is stationary
InΒ [Β ]:
adfuller_test(mean_flow['Value'])
ADF Test Statistic : -2.4945372082496937 p-value : 0.11680914517324759 #Lags Used : 13 Number of Observations Used : 310 Weak evidence against the null hypothesis(H0)
InΒ [Β ]:
mean_flow = mean_flow[['Date', 'Value']]
mean_flow.set_index('Date', inplace=True)
mean_flow.head()
Out[Β ]:
| Value | |
|---|---|
| Date | |
| 1995-01-01 | 0.224 |
| 1995-02-01 | 0.223 |
| 1995-03-01 | 0.227 |
| 1995-04-01 | 0.309 |
| 1995-05-01 | 0.341 |
InΒ [Β ]:
model_names = []
mae_values = []
rmse_values = []
r2_values = []
nse_values = []
willmott_values = []
TRAIN THE MODELΒΆ
InΒ [Β ]:
# Assuming `mean flow` is your time series DataFrame
series = mean_flow
values = series.values
values = add_rolling_features(values, window=3)
values = pd.DataFrame(values).values
scaler = StandardScaler()
values = scaler.fit_transform(values)
data = series_to_supervised(values, n_in=6)
data_df = pd.DataFrame(data, index=series.index[6+2:])
InΒ [Β ]:
# Model names
models = [
('LightGBM', lightgbm_forecast),
('XGBoost', xgboost_forecast),
('SVM', svm_forecast),
('Random Forest', random_forest_forecast)
]
# Clear existing lists in case they contain previous values
model_names.clear()
mae_values.clear()
rmse_values.clear()
r2_values.clear()
nse_values.clear()
willmott_values.clear()
# Iterate over each model, perform walk-forward validation, and append metrics to lists
for model_name, model_func in models:
mae, rmse, r2, nse, willmott, test_index, y, yhat = walk_forward_validation(data_df, model_func)
model_names.append(model_name)
mae_values.append(mae)
rmse_values.append(rmse)
r2_values.append(r2)
nse_values.append(nse)
willmott_values.append(willmott)
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics_df = pd.DataFrame(metrics)
# Plot the actual vs predicted values for each model
plt.figure(figsize=(16, 6))
plt.plot(test_index, y, label='Actual', linestyle='--', color='black', linewidth=2)
# Plot predictions for each model
for model_name, model_func in models:
_, _, _, _, _, test_index, _, yhat = walk_forward_validation(data_df, model_func)
plt.plot(test_index, yhat, label=f'{model_name} Predicted', linestyle='--', color=colors_dict[model_name], linewidth=2)
plt.xlabel('Year and Month', fontsize=18)
plt.ylabel('Scaled Mean Flow(m3/s)', fontsize=18)
plt.legend(fontsize=14, loc='upper center', bbox_to_anchor=(0.6, 1.15), ncol=1)
plt.xticks(rotation=45, fontsize=14)
plt.yticks(fontsize=14)
plt.grid(axis='y', linestyle='--')
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.tight_layout()
plt.show()
>expected=-0.7, predicted=-0.6 >expected=-0.4, predicted=-0.5 >expected=-0.2, predicted=0.0 >expected=3.4, predicted=1.9 >expected=3.2, predicted=2.0 >expected=3.6, predicted=2.0 >expected=-0.0, predicted=-0.0 >expected=-0.7, predicted=-0.5 >expected=-0.6, predicted=-0.8 >expected=-0.7, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=0.0, predicted=-0.2 >expected=1.9, predicted=1.6 >expected=1.1, predicted=2.5 >expected=0.2, predicted=1.6 >expected=1.3, predicted=0.6 >expected=1.4, predicted=0.7 >expected=-0.8, predicted=-0.8 >expected=0.5, predicted=-0.0 >expected=0.4, predicted=-0.0 >expected=-0.2, predicted=0.4 >expected=-0.2, predicted=0.0 >expected=0.0, predicted=-0.2 >expected=0.1, predicted=0.0 >expected=2.2, predicted=1.9 >expected=2.1, predicted=2.3 >expected=1.7, predicted=1.9 >expected=1.8, predicted=1.8 >expected=-0.3, predicted=-0.3 >expected=-0.5, predicted=-0.5 >expected=-0.7, predicted=-0.7 >expected=-0.4, predicted=-0.3 >expected=-0.2, predicted=-0.2 >expected=3.4, predicted=3.1 >expected=3.2, predicted=2.0 >expected=3.6, predicted=2.2 >expected=-0.0, predicted=-0.1 >expected=-0.7, predicted=-0.6 >expected=-0.6, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=0.0, predicted=-0.2 >expected=1.9, predicted=1.6 >expected=1.1, predicted=2.2 >expected=0.2, predicted=1.6 >expected=1.3, predicted=0.7 >expected=1.4, predicted=0.9 >expected=-0.8, predicted=-0.8 >expected=0.5, predicted=0.2 >expected=0.4, predicted=-0.0 >expected=-0.2, predicted=0.1 >expected=-0.2, predicted=0.1 >expected=0.0, predicted=-0.1 >expected=0.1, predicted=0.2 >expected=2.2, predicted=2.2 >expected=2.1, predicted=2.0 >expected=1.7, predicted=1.9 >expected=1.8, predicted=1.8 >expected=-0.3, predicted=-0.3 >expected=-0.5, predicted=-0.5 >expected=-0.7, predicted=-0.6 >expected=-0.4, predicted=-0.4 >expected=-0.2, predicted=-0.1 >expected=3.4, predicted=2.9 >expected=3.2, predicted=2.4 >expected=3.6, predicted=2.6 >expected=-0.0, predicted=0.4 >expected=-0.7, predicted=-0.0 >expected=-0.6, predicted=-0.4 >expected=-0.7, predicted=-0.4 >expected=-0.8, predicted=-0.5 >expected=-0.8, predicted=-0.5 >expected=-0.8, predicted=-0.8 >expected=0.0, predicted=-0.2 >expected=1.9, predicted=1.9 >expected=1.1, predicted=1.5 >expected=0.2, predicted=0.7 >expected=1.3, predicted=1.1 >expected=1.4, predicted=1.0 >expected=-0.8, predicted=-0.6 >expected=0.5, predicted=-0.5 >expected=0.4, predicted=-0.3 >expected=-0.2, predicted=0.2 >expected=-0.2, predicted=0.2 >expected=0.0, predicted=-0.1 >expected=0.1, predicted=0.1 >expected=2.2, predicted=1.3 >expected=2.1, predicted=1.8 >expected=1.7, predicted=2.0 >expected=1.8, predicted=1.8 >expected=-0.3, predicted=-0.2 >expected=-0.5, predicted=-0.6 >expected=-0.7, predicted=-0.6 >expected=-0.4, predicted=-0.4 >expected=-0.2, predicted=-0.2 >expected=3.4, predicted=2.2 >expected=3.2, predicted=1.8 >expected=3.6, predicted=2.4 >expected=-0.0, predicted=-0.0 >expected=-0.7, predicted=-0.6 >expected=-0.6, predicted=-0.6 >expected=-0.7, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=0.0, predicted=-0.2 >expected=1.9, predicted=1.3 >expected=1.1, predicted=2.4 >expected=0.2, predicted=1.2 >expected=1.3, predicted=0.6 >expected=1.4, predicted=0.7 >expected=-0.8, predicted=-0.8 >expected=0.5, predicted=-0.2 >expected=0.4, predicted=-0.3 >expected=-0.2, predicted=0.5 >expected=-0.2, predicted=0.2 >expected=0.0, predicted=0.0 >expected=0.1, predicted=0.1 >expected=2.2, predicted=1.7 >expected=2.1, predicted=2.1 >expected=1.7, predicted=1.7 >expected=1.8, predicted=1.8 >expected=-0.3, predicted=-0.2 >expected=-0.5, predicted=-0.5 >expected=-0.7, predicted=-0.6 >expected=-0.4, predicted=-0.5 >expected=-0.2, predicted=0.0 >expected=3.4, predicted=1.9 >expected=3.2, predicted=2.0 >expected=3.6, predicted=2.0 >expected=-0.0, predicted=-0.0 >expected=-0.7, predicted=-0.5 >expected=-0.6, predicted=-0.8 >expected=-0.7, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=0.0, predicted=-0.2 >expected=1.9, predicted=1.6 >expected=1.1, predicted=2.5 >expected=0.2, predicted=1.6 >expected=1.3, predicted=0.6 >expected=1.4, predicted=0.7 >expected=-0.8, predicted=-0.8 >expected=0.5, predicted=-0.0 >expected=0.4, predicted=-0.0 >expected=-0.2, predicted=0.4 >expected=-0.2, predicted=0.0 >expected=0.0, predicted=-0.2 >expected=0.1, predicted=0.0 >expected=2.2, predicted=1.9 >expected=2.1, predicted=2.3 >expected=1.7, predicted=1.9 >expected=1.8, predicted=1.8 >expected=-0.3, predicted=-0.3 >expected=-0.5, predicted=-0.5 >expected=-0.7, predicted=-0.7 >expected=-0.4, predicted=-0.3 >expected=-0.2, predicted=-0.2 >expected=3.4, predicted=3.1 >expected=3.2, predicted=2.0 >expected=3.6, predicted=2.2 >expected=-0.0, predicted=-0.1 >expected=-0.7, predicted=-0.6 >expected=-0.6, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=0.0, predicted=-0.2 >expected=1.9, predicted=1.6 >expected=1.1, predicted=2.2 >expected=0.2, predicted=1.6 >expected=1.3, predicted=0.7 >expected=1.4, predicted=0.9 >expected=-0.8, predicted=-0.8 >expected=0.5, predicted=0.2 >expected=0.4, predicted=-0.0 >expected=-0.2, predicted=0.1 >expected=-0.2, predicted=0.1 >expected=0.0, predicted=-0.1 >expected=0.1, predicted=0.2 >expected=2.2, predicted=2.2 >expected=2.1, predicted=2.0 >expected=1.7, predicted=1.9 >expected=1.8, predicted=1.8 >expected=-0.3, predicted=-0.3 >expected=-0.5, predicted=-0.5 >expected=-0.7, predicted=-0.6 >expected=-0.4, predicted=-0.4 >expected=-0.2, predicted=-0.1 >expected=3.4, predicted=2.9 >expected=3.2, predicted=2.4 >expected=3.6, predicted=2.6 >expected=-0.0, predicted=0.4 >expected=-0.7, predicted=-0.0 >expected=-0.6, predicted=-0.4 >expected=-0.7, predicted=-0.4 >expected=-0.8, predicted=-0.5 >expected=-0.8, predicted=-0.5 >expected=-0.8, predicted=-0.8 >expected=0.0, predicted=-0.2 >expected=1.9, predicted=1.9 >expected=1.1, predicted=1.5 >expected=0.2, predicted=0.7 >expected=1.3, predicted=1.1 >expected=1.4, predicted=1.0 >expected=-0.8, predicted=-0.6 >expected=0.5, predicted=-0.5 >expected=0.4, predicted=-0.3 >expected=-0.2, predicted=0.2 >expected=-0.2, predicted=0.2 >expected=0.0, predicted=-0.1 >expected=0.1, predicted=0.1 >expected=2.2, predicted=1.3 >expected=2.1, predicted=1.8 >expected=1.7, predicted=2.0 >expected=1.8, predicted=1.8 >expected=-0.3, predicted=-0.2 >expected=-0.5, predicted=-0.6 >expected=-0.7, predicted=-0.6 >expected=-0.4, predicted=-0.4 >expected=-0.2, predicted=-0.2 >expected=3.4, predicted=2.2 >expected=3.2, predicted=1.8 >expected=3.6, predicted=2.4 >expected=-0.0, predicted=-0.1 >expected=-0.7, predicted=-0.6 >expected=-0.6, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=0.0, predicted=-0.3 >expected=1.9, predicted=1.6 >expected=1.1, predicted=2.2 >expected=0.2, predicted=1.4 >expected=1.3, predicted=0.7 >expected=1.4, predicted=0.7 >expected=-0.8, predicted=-0.8 >expected=0.5, predicted=-0.1 >expected=0.4, predicted=-0.3 >expected=-0.2, predicted=0.4 >expected=-0.2, predicted=0.2 >expected=0.0, predicted=0.2 >expected=0.1, predicted=0.2 >expected=2.2, predicted=1.2 >expected=2.1, predicted=2.0 >expected=1.7, predicted=1.7 >expected=1.8, predicted=1.7 >expected=-0.3, predicted=-0.2 >expected=-0.5, predicted=-0.4
EVALUATE THE MODELΒΆ
InΒ [Β ]:
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics = pd.DataFrame(metrics)
InΒ [Β ]:
metrics
Out[Β ]:
| Model | MAE | RMSE | R-squared | Nash-Sutcliffe Efficiency | Willmott's Index of Agreement | |
|---|---|---|---|---|---|---|
| 0 | LightGBM | 0.403790 | 0.629322 | 0.768773 | 0.768773 | 0.927839 |
| 1 | XGBoost | 0.302683 | 0.502463 | 0.852598 | 0.852598 | 0.957353 |
| 2 | SVM | 0.343343 | 0.446910 | 0.883390 | 0.883390 | 0.964259 |
| 3 | Random Forest | 0.380600 | 0.584901 | 0.800263 | 0.800263 | 0.937047 |
InΒ [Β ]:
metrics = metrics.round(2)
fig, ax = plt.subplots(figsize=(20, 4))
ax.axis('off')
table = ax.table(cellText=metrics.values,
colLabels=metrics.columns,
cellLoc='center',
loc='center',
bbox=[0, 0, 1, 1])
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2)
header_color = '#D0D0D0'
data_color = '#FFFFFF'
for i, key in enumerate(table._cells):
cell = table._cells[key]
cell.set_edgecolor('black')
if key[0] == 0 or key[1] == -1:
cell.set_text_props(weight='bold')
cell.set_facecolor(header_color)
else:
cell.set_facecolor(data_color)
plt.title('Models Performance on Mean Flow')
plt.show()
InΒ [Β ]:
colors = ['#27ad81', '#5dc863', '#aadc32', '#fde725']
bar_width = 0.4
# Metrics and Titles
metrics_title = {
'MAE': 'Mean Absolute Error (MAE)',
'RMSE': 'Root Mean Squared Error (RMSE)',
'R-squared': 'R-squared',
'Nash-Sutcliffe Efficiency': 'Nash-Sutcliffe Efficiency',
'Willmott\'s Index of Agreement': 'Willmott\'s Index of Agreement'
}
# Plot each metric separately
for metric, title in metrics_title.items():
fig, ax = plt.subplots(figsize=(8, 6))
ax.bar(metrics['Model'], metrics[metric], color=colors, width=bar_width)
# ax.set_title(title, fontsize=16)
ax.set_ylabel(metric, fontsize=16)
ax.tick_params(axis='x', rotation=30, labelsize=16)
ax.tick_params(axis='y', labelsize=16)
plt.tight_layout()
plt.show()
7. FLOW ANALYSISΒΆ
InΒ [Β ]:
# Get the temp_max values
flow = transformed_data[transformed_data['Parameter'] == parameters[1]]
flow.head()
Out[Β ]:
| Parameter | Date | Value | |
|---|---|---|---|
| 1 | FLow | 1995-01-01 | 0.60100 |
| 6 | FLow | 1996-01-01 | 4.89483 |
| 11 | FLow | 1997-01-01 | 0.48600 |
| 16 | FLow | 1998-01-01 | 0.79000 |
| 21 | FLow | 1999-01-01 | 0.38500 |
InΒ [Β ]:
# drop the parameter columns
flow = flow.drop(columns=['Parameter'])
InΒ [Β ]:
flow.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 1 | 1995-01-01 | 0.60100 |
| 6 | 1996-01-01 | 4.89483 |
| 11 | 1997-01-01 | 0.48600 |
| 16 | 1998-01-01 | 0.79000 |
| 21 | 1999-01-01 | 0.38500 |
InΒ [Β ]:
# sort according to the date
flow= flow.sort_values(by='Date')
flow.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 1 | 1995-01-01 | 0.601 |
| 136 | 1995-02-01 | 0.539 |
| 271 | 1995-03-01 | 0.609 |
| 406 | 1995-04-01 | 0.802 |
| 541 | 1995-05-01 | 0.914 |
InΒ [Β ]:
flow.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 1 | 1995-01-01 | 0.601 |
| 136 | 1995-02-01 | 0.539 |
| 271 | 1995-03-01 | 0.609 |
| 406 | 1995-04-01 | 0.802 |
| 541 | 1995-05-01 | 0.914 |
InΒ [Β ]:
plt.figure(figsize=(20, 8))
flow.plot(x='Date', y='Value', kind='line', ax=plt.gca(), color='#8b0000', fontsize=12)
plt.xlabel('Date', fontdict={'fontsize':20})
plt.ylabel('Flow (m3/s)', fontdict={'fontsize':20})
plt.xticks(size = 20)
plt.yticks(size = 20)
plt.grid(linestyle='--')
plt.tight_layout()
plt.legend().remove()
plt.show()
InΒ [Β ]:
# Add fuller test to check the stationarity of the dataset
def adfuller_test(values):
result = adfuller(values)
labels = ['ADF Test Statistic','p-value','#Lags Used', 'Number of Observations Used']
for value, label in zip(result, labels):
print(label+' : '+str(value))
if result[1] <= 0.05:
print("Strong evidence against the null hypothesis(H0)")
else:
print("Weak evidence against the null hypothesis(H0)")
InΒ [Β ]:
# H0: It is non stationary
# H1: It is stationary
InΒ [Β ]:
adfuller_test(flow['Value'])
ADF Test Statistic : -2.592225967325901 p-value : 0.09461467042325716 #Lags Used : 13 Number of Observations Used : 310 Weak evidence against the null hypothesis(H0)
InΒ [Β ]:
flow = flow[['Date', 'Value']]
flow.set_index('Date', inplace=True)
flow.head()
Out[Β ]:
| Value | |
|---|---|
| Date | |
| 1995-01-01 | 0.601 |
| 1995-02-01 | 0.539 |
| 1995-03-01 | 0.609 |
| 1995-04-01 | 0.802 |
| 1995-05-01 | 0.914 |
InΒ [Β ]:
flow_model_names = []
flow_mae_values = []
flow_rmse_values = []
flow_r2_values = []
flow_nse_values = []
flow_willmott_values = []
TRAIN THE MODELSΒΆ
InΒ [Β ]:
# Assuming `temp_max` is your time series DataFrame
series = flow
values = series.values
values = add_rolling_features(values, window=3)
values = pd.DataFrame(values).values
scaler = StandardScaler()
values = scaler.fit_transform(values)
data = series_to_supervised(values, n_in=6)
data_df = pd.DataFrame(data, index=series.index[6+2:])
InΒ [Β ]:
# Model names
models = [
('LightGBM', lightgbm_forecast),
('XGBoost', xgboost_forecast),
('SVM', svm_forecast),
('Random Forest', random_forest_forecast)
]
# Clear existing lists in case they contain previous values
model_names.clear()
mae_values.clear()
rmse_values.clear()
r2_values.clear()
nse_values.clear()
willmott_values.clear()
# Iterate over each model, perform walk-forward validation, and append metrics to lists
for model_name, model_func in models:
mae, rmse, r2, nse, willmott, test_index, y, yhat = walk_forward_validation(data_df, model_func)
model_names.append(model_name)
mae_values.append(mae)
rmse_values.append(rmse)
r2_values.append(r2)
nse_values.append(nse)
willmott_values.append(willmott)
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics_df = pd.DataFrame(metrics)
# Plot the actual vs predicted values for each model
plt.figure(figsize=(16, 6))
plt.plot(test_index, y, label='Actual', linestyle='--', color='black', linewidth=2)
for model_name, model_func in models:
_, _, _, _, _, test_index, _, yhat = walk_forward_validation(data_df, model_func)
plt.plot(test_index, yhat, label=f'{model_name} Predicted', linestyle='--', color=colors_dict[model_name], linewidth=2)
plt.xlabel('Year and Month', fontsize=18)
plt.ylabel('Scaled Flow (m3/s)', fontsize=18)
plt.legend(fontsize=14, loc='upper right', bbox_to_anchor=(0.6, 1.15), ncol=1)
plt.xticks(rotation=45, fontsize=14)
plt.yticks(fontsize=14)
plt.grid(axis='y', linestyle='--')
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.tight_layout()
plt.show()
>expected=-0.7, predicted=-0.6 >expected=-0.4, predicted=-0.6 >expected=-0.2, predicted=0.0 >expected=3.4, predicted=2.0 >expected=3.2, predicted=1.7 >expected=3.6, predicted=1.9 >expected=-0.1, predicted=-0.1 >expected=-0.7, predicted=-0.6 >expected=-0.6, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.0, predicted=-0.2 >expected=1.9, predicted=2.0 >expected=1.1, predicted=2.6 >expected=0.2, predicted=1.8 >expected=1.2, predicted=0.7 >expected=1.4, predicted=0.7 >expected=-0.8, predicted=-0.7 >expected=0.5, predicted=0.0 >expected=0.4, predicted=-0.0 >expected=-0.3, predicted=0.4 >expected=-0.3, predicted=-0.2 >expected=-0.0, predicted=-0.2 >expected=0.0, predicted=0.1 >expected=2.2, predicted=1.8 >expected=2.2, predicted=2.0 >expected=1.7, predicted=1.9 >expected=1.9, predicted=1.7 >expected=-0.3, predicted=-0.1 >expected=-0.5, predicted=-0.6 >expected=-0.7, predicted=-0.7 >expected=-0.4, predicted=-0.5 >expected=-0.2, predicted=-0.1 >expected=3.4, predicted=2.0 >expected=3.2, predicted=1.9 >expected=3.6, predicted=2.8 >expected=-0.1, predicted=0.0 >expected=-0.7, predicted=-0.6 >expected=-0.6, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.0, predicted=-0.2 >expected=1.9, predicted=1.7 >expected=1.1, predicted=1.7 >expected=0.2, predicted=0.9 >expected=1.2, predicted=0.6 >expected=1.4, predicted=0.8 >expected=-0.8, predicted=-0.8 >expected=0.5, predicted=0.1 >expected=0.4, predicted=-0.2 >expected=-0.3, predicted=0.2 >expected=-0.3, predicted=-0.1 >expected=-0.0, predicted=-0.2 >expected=0.0, predicted=-0.2 >expected=2.2, predicted=1.5 >expected=2.2, predicted=2.2 >expected=1.7, predicted=1.7 >expected=1.9, predicted=1.9 >expected=-0.3, predicted=-0.3 >expected=-0.5, predicted=-0.5 >expected=-0.7, predicted=-0.7 >expected=-0.4, predicted=-0.5 >expected=-0.2, predicted=-0.1 >expected=3.4, predicted=2.9 >expected=3.2, predicted=2.4 >expected=3.6, predicted=2.4 >expected=-0.1, predicted=0.3 >expected=-0.7, predicted=-0.0 >expected=-0.6, predicted=-0.4 >expected=-0.7, predicted=-0.4 >expected=-0.8, predicted=-0.5 >expected=-0.8, predicted=-0.5 >expected=-0.8, predicted=-0.8 >expected=-0.0, predicted=-0.2 >expected=1.9, predicted=2.0 >expected=1.1, predicted=1.5 >expected=0.2, predicted=0.8 >expected=1.2, predicted=1.0 >expected=1.4, predicted=0.9 >expected=-0.8, predicted=-0.6 >expected=0.5, predicted=-0.5 >expected=0.4, predicted=0.0 >expected=-0.3, predicted=0.4 >expected=-0.3, predicted=0.2 >expected=-0.0, predicted=-0.2 >expected=0.0, predicted=0.1 >expected=2.2, predicted=1.4 >expected=2.2, predicted=1.5 >expected=1.7, predicted=1.8 >expected=1.9, predicted=1.6 >expected=-0.3, predicted=-0.3 >expected=-0.5, predicted=-0.6 >expected=-0.7, predicted=-0.6 >expected=-0.4, predicted=-0.4 >expected=-0.2, predicted=-0.1 >expected=3.4, predicted=2.0 >expected=3.2, predicted=1.9 >expected=3.6, predicted=2.3 >expected=-0.1, predicted=-0.1 >expected=-0.7, predicted=-0.6 >expected=-0.6, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.0, predicted=-0.3 >expected=1.9, predicted=1.9 >expected=1.1, predicted=1.7 >expected=0.2, predicted=1.0 >expected=1.2, predicted=0.6 >expected=1.4, predicted=0.8 >expected=-0.8, predicted=-0.8 >expected=0.5, predicted=-0.2 >expected=0.4, predicted=-0.3 >expected=-0.3, predicted=0.4 >expected=-0.3, predicted=0.3 >expected=-0.0, predicted=0.0 >expected=0.0, predicted=0.1 >expected=2.2, predicted=1.5 >expected=2.2, predicted=1.9 >expected=1.7, predicted=1.8 >expected=1.9, predicted=1.7 >expected=-0.3, predicted=-0.3 >expected=-0.5, predicted=-0.5 >expected=-0.7, predicted=-0.6 >expected=-0.4, predicted=-0.6 >expected=-0.2, predicted=0.0 >expected=3.4, predicted=2.0 >expected=3.2, predicted=1.7 >expected=3.6, predicted=1.9 >expected=-0.1, predicted=-0.1 >expected=-0.7, predicted=-0.6 >expected=-0.6, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.0, predicted=-0.2 >expected=1.9, predicted=2.0 >expected=1.1, predicted=2.6 >expected=0.2, predicted=1.8 >expected=1.2, predicted=0.7 >expected=1.4, predicted=0.7 >expected=-0.8, predicted=-0.7 >expected=0.5, predicted=0.0 >expected=0.4, predicted=-0.0 >expected=-0.3, predicted=0.4 >expected=-0.3, predicted=-0.2 >expected=-0.0, predicted=-0.2 >expected=0.0, predicted=0.1 >expected=2.2, predicted=1.8 >expected=2.2, predicted=2.0 >expected=1.7, predicted=1.9 >expected=1.9, predicted=1.7 >expected=-0.3, predicted=-0.1 >expected=-0.5, predicted=-0.6 >expected=-0.7, predicted=-0.7 >expected=-0.4, predicted=-0.5 >expected=-0.2, predicted=-0.1 >expected=3.4, predicted=2.0 >expected=3.2, predicted=1.9 >expected=3.6, predicted=2.8 >expected=-0.1, predicted=0.0 >expected=-0.7, predicted=-0.6 >expected=-0.6, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.0, predicted=-0.2 >expected=1.9, predicted=1.7 >expected=1.1, predicted=1.7 >expected=0.2, predicted=0.9 >expected=1.2, predicted=0.6 >expected=1.4, predicted=0.8 >expected=-0.8, predicted=-0.8 >expected=0.5, predicted=0.1 >expected=0.4, predicted=-0.2 >expected=-0.3, predicted=0.2 >expected=-0.3, predicted=-0.1 >expected=-0.0, predicted=-0.2 >expected=0.0, predicted=-0.2 >expected=2.2, predicted=1.5 >expected=2.2, predicted=2.2 >expected=1.7, predicted=1.7 >expected=1.9, predicted=1.9 >expected=-0.3, predicted=-0.3 >expected=-0.5, predicted=-0.5 >expected=-0.7, predicted=-0.7 >expected=-0.4, predicted=-0.5 >expected=-0.2, predicted=-0.1 >expected=3.4, predicted=2.9 >expected=3.2, predicted=2.4 >expected=3.6, predicted=2.4 >expected=-0.1, predicted=0.3 >expected=-0.7, predicted=-0.0 >expected=-0.6, predicted=-0.4 >expected=-0.7, predicted=-0.4 >expected=-0.8, predicted=-0.5 >expected=-0.8, predicted=-0.5 >expected=-0.8, predicted=-0.8 >expected=-0.0, predicted=-0.2 >expected=1.9, predicted=2.0 >expected=1.1, predicted=1.5 >expected=0.2, predicted=0.8 >expected=1.2, predicted=1.0 >expected=1.4, predicted=0.9 >expected=-0.8, predicted=-0.6 >expected=0.5, predicted=-0.5 >expected=0.4, predicted=0.0 >expected=-0.3, predicted=0.4 >expected=-0.3, predicted=0.2 >expected=-0.0, predicted=-0.2 >expected=0.0, predicted=0.1 >expected=2.2, predicted=1.4 >expected=2.2, predicted=1.5 >expected=1.7, predicted=1.8 >expected=1.9, predicted=1.6 >expected=-0.3, predicted=-0.3 >expected=-0.5, predicted=-0.6 >expected=-0.7, predicted=-0.6 >expected=-0.4, predicted=-0.4 >expected=-0.2, predicted=-0.1 >expected=3.4, predicted=2.0 >expected=3.2, predicted=1.9 >expected=3.6, predicted=2.1 >expected=-0.1, predicted=-0.2 >expected=-0.7, predicted=-0.6 >expected=-0.6, predicted=-0.6 >expected=-0.7, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.0, predicted=-0.3 >expected=1.9, predicted=1.9 >expected=1.1, predicted=1.9 >expected=0.2, predicted=1.2 >expected=1.2, predicted=0.5 >expected=1.4, predicted=0.8 >expected=-0.8, predicted=-0.8 >expected=0.5, predicted=-0.2 >expected=0.4, predicted=-0.2 >expected=-0.3, predicted=0.4 >expected=-0.3, predicted=0.2 >expected=-0.0, predicted=0.2 >expected=0.0, predicted=0.1 >expected=2.2, predicted=1.5 >expected=2.2, predicted=1.9 >expected=1.7, predicted=1.8 >expected=1.9, predicted=1.6 >expected=-0.3, predicted=-0.2 >expected=-0.5, predicted=-0.5
EVALUATE THE MODELΒΆ
InΒ [Β ]:
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics = pd.DataFrame(metrics)
InΒ [Β ]:
metrics
Out[Β ]:
| Model | MAE | RMSE | R-squared | Nash-Sutcliffe Efficiency | Willmott's Index of Agreement | |
|---|---|---|---|---|---|---|
| 0 | LightGBM | 0.409872 | 0.660180 | 0.751504 | 0.751504 | 0.921768 |
| 1 | XGBoost | 0.308311 | 0.478650 | 0.869374 | 0.869374 | 0.959671 |
| 2 | SVM | 0.366457 | 0.478225 | 0.869605 | 0.869605 | 0.958244 |
| 3 | Random Forest | 0.359409 | 0.548506 | 0.828463 | 0.828463 | 0.944408 |
InΒ [Β ]:
metrics = metrics.round(2)
fig, ax = plt.subplots(figsize=(20, 4))
ax.axis('off')
table = ax.table(cellText=metrics.values,
colLabels=metrics.columns,
cellLoc='center',
loc='center',
bbox=[0, 0, 1, 1])
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2)
header_color = '#D0D0D0'
data_color = '#FFFFFF'
for i, key in enumerate(table._cells):
cell = table._cells[key]
cell.set_edgecolor('black')
if key[0] == 0 or key[1] == -1:
cell.set_text_props(weight='bold')
cell.set_facecolor(header_color)
else:
cell.set_facecolor(data_color)
plt.title('Models Performance on Flow')
plt.show()
InΒ [Β ]:
colors = ['#27ad81', '#5dc863', '#aadc32', '#fde725']
bar_width = 0.4
# Metrics and Titles
metrics_title = {
'MAE': 'Mean Absolute Error (MAE)',
'RMSE': 'Root Mean Squared Error (RMSE)',
'R-squared': 'R-squared',
'Nash-Sutcliffe Efficiency': 'Nash-Sutcliffe Efficiency',
'Willmott\'s Index of Agreement': 'Willmott\'s Index of Agreement'
}
# Plot each metric separately
for metric, title in metrics_title.items():
fig, ax = plt.subplots(figsize=(8, 6))
ax.bar(metrics['Model'], metrics[metric], color=colors, width=bar_width)
# ax.set_title(title, fontsize=16)
ax.set_ylabel(metric, fontsize=16)
ax.tick_params(axis='x', rotation=30, labelsize=16)
ax.tick_params(axis='y', labelsize=16)
plt.tight_layout()
plt.show()
8. MAX FLOW ANALYSISΒΆ
InΒ [Β ]:
# Get the temp_max values
flow_max = transformed_data[transformed_data['Parameter'] == parameters[2]]
flow_max.head()
Out[Β ]:
| Parameter | Date | Value | |
|---|---|---|---|
| 2 | Max FLow | 1995-01-01 | 0.235 |
| 7 | Max FLow | 1996-01-01 | 0.345 |
| 12 | Max FLow | 1997-01-01 | 0.223 |
| 17 | Max FLow | 1998-01-01 | 0.362 |
| 22 | Max FLow | 1999-01-01 | 0.159 |
InΒ [Β ]:
# drop the parameter columns
flow_max = flow_max.drop(columns=['Parameter'])
InΒ [Β ]:
flow_max.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 2 | 1995-01-01 | 0.235 |
| 7 | 1996-01-01 | 0.345 |
| 12 | 1997-01-01 | 0.223 |
| 17 | 1998-01-01 | 0.362 |
| 22 | 1999-01-01 | 0.159 |
InΒ [Β ]:
# sort according to the date
flow_max= flow_max.sort_values(by='Date')
flow_max.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 2 | 1995-01-01 | 0.235 |
| 137 | 1995-02-01 | 0.223 |
| 272 | 1995-03-01 | 0.260 |
| 407 | 1995-04-01 | 0.569 |
| 542 | 1995-05-01 | 1.683 |
InΒ [Β ]:
flow_max.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 2 | 1995-01-01 | 0.235 |
| 137 | 1995-02-01 | 0.223 |
| 272 | 1995-03-01 | 0.260 |
| 407 | 1995-04-01 | 0.569 |
| 542 | 1995-05-01 | 1.683 |
InΒ [Β ]:
plt.figure(figsize=(20, 8))
flow_max.plot(x='Date', y='Value', kind='line', ax=plt.gca(), color='#ff6347', fontsize=12)
plt.xlabel('Date', fontdict={'fontsize':20})
plt.ylabel('Max Flow (m3/s)', fontdict={'fontsize':20})
plt.xticks(size = 20)
plt.yticks(size = 20)
plt.grid(linestyle='--')
plt.tight_layout()
plt.legend().remove()
plt.show()
InΒ [Β ]:
# Add fuller test to check the stationarity of the dataset
def adfuller_test(values):
result = adfuller(values)
labels = ['ADF Test Statistic','p-value','#Lags Used', 'Number of Observations Used']
for value, label in zip(result, labels):
print(label+' : '+str(value))
if result[1] <= 0.05:
print("Strong evidence against the null hypothesis(H0)")
else:
print("Weak evidence against the null hypothesis(H0)")
InΒ [Β ]:
# H0: It is non stationary
# H1: It is stationary
InΒ [Β ]:
adfuller_test(flow_max['Value'])
ADF Test Statistic : -4.083392870423538 p-value : 0.001031585994254914 #Lags Used : 13 Number of Observations Used : 310 Strong evidence against the null hypothesis(H0)
InΒ [Β ]:
flow_max = flow_max[['Date', 'Value']]
flow_max.set_index('Date', inplace=True)
flow_max.head()
Out[Β ]:
| Value | |
|---|---|
| Date | |
| 1995-01-01 | 0.235 |
| 1995-02-01 | 0.223 |
| 1995-03-01 | 0.260 |
| 1995-04-01 | 0.569 |
| 1995-05-01 | 1.683 |
InΒ [Β ]:
flow_max_model_names = []
flow_max_mae_values = []
flow_max_rmse_values = []
flow_max_r2_values = []
flow_max_nse_values = []
flow_max_willmott_values = []
TRAIN THE MODELΒΆ
InΒ [Β ]:
# Assuming `flow_max` is your time series DataFrame
series = flow_max
values = series.values
values = add_rolling_features(values, window=3)
values = pd.DataFrame(values).values
scaler = StandardScaler()
values = scaler.fit_transform(values)
data = series_to_supervised(values, n_in=6)
data_df = pd.DataFrame(data, index=series.index[6+2:])
InΒ [Β ]:
# Model names
models = [
('LightGBM', lightgbm_forecast),
('XGBoost', xgboost_forecast),
('SVM', svm_forecast),
('Random Forest', random_forest_forecast)
]
# Clear existing lists in case they contain previous values
model_names.clear()
mae_values.clear()
rmse_values.clear()
r2_values.clear()
nse_values.clear()
willmott_values.clear()
# Iterate over each model, perform walk-forward validation, and append metrics to lists
for model_name, model_func in models:
mae, rmse, r2, nse, willmott, test_index, y, yhat = walk_forward_validation(data_df, model_func)
model_names.append(model_name)
mae_values.append(mae)
rmse_values.append(rmse)
r2_values.append(r2)
nse_values.append(nse)
willmott_values.append(willmott)
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics_df = pd.DataFrame(metrics)
# Plot the actual vs predicted values for each model
plt.figure(figsize=(16, 6))
plt.plot(test_index, y, label='Actual', linestyle='--', color='black', linewidth=2)
for model_name, model_func in models:
_, _, _, _, _, test_index, _, yhat = walk_forward_validation(data_df, model_func)
plt.plot(test_index, yhat, label=f'{model_name} Predicted', linestyle='--', color=colors_dict[model_name], linewidth=2)
plt.xlabel('Year and Month', fontsize=18)
plt.ylabel('Scaled Max Flow (m3/s)', fontsize=18)
plt.legend(fontsize=14, loc='upper right', bbox_to_anchor=(0.6, 1.15), ncol=1)
plt.xticks(rotation=45, fontsize=14)
plt.yticks(fontsize=14)
plt.grid(axis='y', linestyle='--')
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.tight_layout()
plt.show()
>expected=-0.6, predicted=-0.6 >expected=-0.5, predicted=-0.5 >expected=-0.5, predicted=-0.4 >expected=3.0, predicted=2.3 >expected=2.9, predicted=2.7 >expected=3.0, predicted=2.7 >expected=-0.2, predicted=0.1 >expected=-0.6, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.2, predicted=-0.3 >expected=-0.2, predicted=-0.2 >expected=-0.5, predicted=-0.3 >expected=-0.5, predicted=-0.5 >expected=-0.2, predicted=-0.3 >expected=-0.1, predicted=-0.1 >expected=-0.7, predicted=-0.6 >expected=-0.5, predicted=-0.6 >expected=-0.5, predicted=-0.6 >expected=-0.6, predicted=-0.5 >expected=-0.6, predicted=-0.5 >expected=-0.6, predicted=-0.4 >expected=0.1, predicted=0.1 >expected=0.9, predicted=0.6 >expected=0.3, predicted=1.2 >expected=0.7, predicted=1.4 >expected=0.8, predicted=0.9 >expected=-0.3, predicted=-0.4 >expected=-0.5, predicted=-0.5 >expected=-0.6, predicted=-0.6 >expected=-0.5, predicted=-0.5 >expected=-0.5, predicted=-0.4 >expected=3.0, predicted=2.8 >expected=2.9, predicted=2.6 >expected=3.0, predicted=2.5 >expected=-0.2, predicted=-0.3 >expected=-0.6, predicted=-0.6 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.2, predicted=-0.3 >expected=-0.2, predicted=-0.2 >expected=-0.5, predicted=-0.2 >expected=-0.5, predicted=-0.1 >expected=-0.2, predicted=-0.3 >expected=-0.1, predicted=-0.1 >expected=-0.7, predicted=-0.7 >expected=-0.5, predicted=-0.6 >expected=-0.5, predicted=-0.6 >expected=-0.6, predicted=-0.5 >expected=-0.6, predicted=-0.5 >expected=-0.6, predicted=-0.4 >expected=0.1, predicted=0.1 >expected=0.9, predicted=0.6 >expected=0.3, predicted=1.2 >expected=0.7, predicted=1.2 >expected=0.8, predicted=0.7 >expected=-0.3, predicted=-0.3 >expected=-0.5, predicted=-0.5 >expected=-0.6, predicted=-0.6 >expected=-0.5, predicted=-0.4 >expected=-0.5, predicted=-0.4 >expected=3.0, predicted=2.8 >expected=2.9, predicted=2.6 >expected=3.0, predicted=2.5 >expected=-0.2, predicted=-0.1 >expected=-0.6, predicted=-0.4 >expected=-0.7, predicted=-0.5 >expected=-0.7, predicted=-0.6 >expected=-0.7, predicted=-0.6 >expected=-0.7, predicted=-0.6 >expected=-0.7, predicted=-0.6 >expected=-0.2, predicted=-0.3 >expected=-0.2, predicted=-0.1 >expected=-0.5, predicted=-0.2 >expected=-0.5, predicted=-0.2 >expected=-0.2, predicted=-0.4 >expected=-0.1, predicted=-0.3 >expected=-0.7, predicted=-0.6 >expected=-0.5, predicted=-0.6 >expected=-0.5, predicted=-0.5 >expected=-0.6, predicted=-0.5 >expected=-0.6, predicted=-0.5 >expected=-0.6, predicted=-0.5 >expected=0.1, predicted=-0.0 >expected=0.9, predicted=1.0 >expected=0.3, predicted=0.8 >expected=0.7, predicted=0.8 >expected=0.8, predicted=1.0 >expected=-0.3, predicted=-0.2 >expected=-0.5, predicted=-0.5 >expected=-0.6, predicted=-0.6 >expected=-0.5, predicted=-0.4 >expected=-0.5, predicted=-0.4 >expected=3.0, predicted=2.5 >expected=2.9, predicted=2.7 >expected=3.0, predicted=2.5 >expected=-0.2, predicted=-0.3 >expected=-0.6, predicted=-0.6 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.2, predicted=-0.3 >expected=-0.2, predicted=-0.2 >expected=-0.5, predicted=0.0 >expected=-0.5, predicted=-0.2 >expected=-0.2, predicted=-0.4 >expected=-0.1, predicted=-0.2 >expected=-0.7, predicted=-0.7 >expected=-0.5, predicted=-0.6 >expected=-0.5, predicted=-0.6 >expected=-0.6, predicted=-0.5 >expected=-0.6, predicted=-0.4 >expected=-0.6, predicted=-0.4 >expected=0.1, predicted=0.1 >expected=0.9, predicted=0.8 >expected=0.3, predicted=0.9 >expected=0.7, predicted=1.0 >expected=0.8, predicted=0.9 >expected=-0.3, predicted=-0.3 >expected=-0.5, predicted=-0.5 >expected=-0.6, predicted=-0.6 >expected=-0.5, predicted=-0.5 >expected=-0.5, predicted=-0.4 >expected=3.0, predicted=2.3 >expected=2.9, predicted=2.7 >expected=3.0, predicted=2.7 >expected=-0.2, predicted=0.1 >expected=-0.6, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.2, predicted=-0.3 >expected=-0.2, predicted=-0.2 >expected=-0.5, predicted=-0.3 >expected=-0.5, predicted=-0.5 >expected=-0.2, predicted=-0.3 >expected=-0.1, predicted=-0.1 >expected=-0.7, predicted=-0.6 >expected=-0.5, predicted=-0.6 >expected=-0.5, predicted=-0.6 >expected=-0.6, predicted=-0.5 >expected=-0.6, predicted=-0.5 >expected=-0.6, predicted=-0.4 >expected=0.1, predicted=0.1 >expected=0.9, predicted=0.6 >expected=0.3, predicted=1.2 >expected=0.7, predicted=1.4 >expected=0.8, predicted=0.9 >expected=-0.3, predicted=-0.4 >expected=-0.5, predicted=-0.5 >expected=-0.6, predicted=-0.6 >expected=-0.5, predicted=-0.5 >expected=-0.5, predicted=-0.4 >expected=3.0, predicted=2.8 >expected=2.9, predicted=2.6 >expected=3.0, predicted=2.5 >expected=-0.2, predicted=-0.3 >expected=-0.6, predicted=-0.6 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.2, predicted=-0.3 >expected=-0.2, predicted=-0.2 >expected=-0.5, predicted=-0.2 >expected=-0.5, predicted=-0.1 >expected=-0.2, predicted=-0.3 >expected=-0.1, predicted=-0.1 >expected=-0.7, predicted=-0.7 >expected=-0.5, predicted=-0.6 >expected=-0.5, predicted=-0.6 >expected=-0.6, predicted=-0.5 >expected=-0.6, predicted=-0.5 >expected=-0.6, predicted=-0.4 >expected=0.1, predicted=0.1 >expected=0.9, predicted=0.6 >expected=0.3, predicted=1.2 >expected=0.7, predicted=1.2 >expected=0.8, predicted=0.7 >expected=-0.3, predicted=-0.3 >expected=-0.5, predicted=-0.5 >expected=-0.6, predicted=-0.6 >expected=-0.5, predicted=-0.4 >expected=-0.5, predicted=-0.4 >expected=3.0, predicted=2.8 >expected=2.9, predicted=2.6 >expected=3.0, predicted=2.5 >expected=-0.2, predicted=-0.1 >expected=-0.6, predicted=-0.4 >expected=-0.7, predicted=-0.5 >expected=-0.7, predicted=-0.6 >expected=-0.7, predicted=-0.6 >expected=-0.7, predicted=-0.6 >expected=-0.7, predicted=-0.6 >expected=-0.2, predicted=-0.3 >expected=-0.2, predicted=-0.1 >expected=-0.5, predicted=-0.2 >expected=-0.5, predicted=-0.2 >expected=-0.2, predicted=-0.4 >expected=-0.1, predicted=-0.3 >expected=-0.7, predicted=-0.6 >expected=-0.5, predicted=-0.6 >expected=-0.5, predicted=-0.5 >expected=-0.6, predicted=-0.5 >expected=-0.6, predicted=-0.5 >expected=-0.6, predicted=-0.5 >expected=0.1, predicted=-0.0 >expected=0.9, predicted=1.0 >expected=0.3, predicted=0.8 >expected=0.7, predicted=0.8 >expected=0.8, predicted=1.0 >expected=-0.3, predicted=-0.2 >expected=-0.5, predicted=-0.5 >expected=-0.6, predicted=-0.6 >expected=-0.5, predicted=-0.5 >expected=-0.5, predicted=-0.4 >expected=3.0, predicted=2.7 >expected=2.9, predicted=2.7 >expected=3.0, predicted=2.4 >expected=-0.2, predicted=-0.3 >expected=-0.6, predicted=-0.6 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.7, predicted=-0.7 >expected=-0.2, predicted=-0.3 >expected=-0.2, predicted=-0.2 >expected=-0.5, predicted=-0.1 >expected=-0.5, predicted=-0.2 >expected=-0.2, predicted=-0.4 >expected=-0.1, predicted=-0.2 >expected=-0.7, predicted=-0.7 >expected=-0.5, predicted=-0.6 >expected=-0.5, predicted=-0.6 >expected=-0.6, predicted=-0.5 >expected=-0.6, predicted=-0.5 >expected=-0.6, predicted=-0.4 >expected=0.1, predicted=0.2 >expected=0.9, predicted=1.1 >expected=0.3, predicted=1.0 >expected=0.7, predicted=0.9 >expected=0.8, predicted=0.8 >expected=-0.3, predicted=-0.3 >expected=-0.5, predicted=-0.5
EVALUATE THE MODELΒΆ
InΒ [Β ]:
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics = pd.DataFrame(metrics)
InΒ [Β ]:
metrics
Out[Β ]:
| Model | MAE | RMSE | R-squared | Nash-Sutcliffe Efficiency | Willmott's Index of Agreement | |
|---|---|---|---|---|---|---|
| 0 | LightGBM | 0.147210 | 0.262236 | 0.935964 | 0.935964 | 0.982711 |
| 1 | XGBoost | 0.137944 | 0.233216 | 0.949353 | 0.949353 | 0.986443 |
| 2 | SVM | 0.142707 | 0.184150 | 0.968422 | 0.968422 | 0.991383 |
| 3 | Random Forest | 0.128732 | 0.208715 | 0.959436 | 0.959436 | 0.988968 |
InΒ [Β ]:
metrics = metrics.round(2)
fig, ax = plt.subplots(figsize=(20, 4))
ax.axis('off')
table = ax.table(cellText=metrics.values,
colLabels=metrics.columns,
cellLoc='center',
loc='center',
bbox=[0, 0, 1, 1])
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2)
header_color = '#D0D0D0'
data_color = '#FFFFFF'
for i, key in enumerate(table._cells):
cell = table._cells[key]
cell.set_edgecolor('black')
if key[0] == 0 or key[1] == -1:
cell.set_text_props(weight='bold')
cell.set_facecolor(header_color)
else:
cell.set_facecolor(data_color)
plt.title('Models Performance on Max Flow')
plt.show()
InΒ [Β ]:
colors = ['#27ad81', '#5dc863', '#aadc32', '#fde725']
bar_width = 0.4
# Metrics and Titles
metrics_title = {
'MAE': 'Mean Absolute Error (MAE)',
'RMSE': 'Root Mean Squared Error (RMSE)',
'R-squared': 'R-squared',
'Nash-Sutcliffe Efficiency': 'Nash-Sutcliffe Efficiency',
'Willmott\'s Index of Agreement': 'Willmott\'s Index of Agreement'
}
# Plot each metric separately
for metric, title in metrics_title.items():
fig, ax = plt.subplots(figsize=(8, 6))
ax.bar(metrics['Model'], metrics[metric], color=colors, width=bar_width)
# ax.set_title(title, fontsize=16)
ax.set_ylabel(metric, fontsize=16)
ax.tick_params(axis='x', rotation=30, labelsize=16)
ax.tick_params(axis='y', labelsize=16)
plt.tight_layout()
plt.show()
9. MIN FLOW ANALYSISΒΆ
InΒ [Β ]:
flow_min = transformed_data[transformed_data['Parameter']=='Min Flow']
InΒ [Β ]:
# drop the parameter columns
flow_min = flow_min.drop(columns=['Parameter'])
InΒ [Β ]:
flow_min.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 3 | 1995-01-01 | 0.212 |
| 8 | 1996-01-01 | 0.070 |
| 13 | 1997-01-01 | 0.169 |
| 18 | 1998-01-01 | 0.274 |
| 23 | 1999-01-01 | 0.116 |
InΒ [Β ]:
# sort according to the date
flow_min = flow_min.sort_values(by='Date')
flow_min.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 3 | 1995-01-01 | 0.212 |
| 138 | 1995-02-01 | 0.223 |
| 273 | 1995-03-01 | 0.212 |
| 408 | 1995-04-01 | 0.200 |
| 543 | 1995-05-01 | 0.235 |
InΒ [Β ]:
flow_min.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 3 | 1995-01-01 | 0.212 |
| 138 | 1995-02-01 | 0.223 |
| 273 | 1995-03-01 | 0.212 |
| 408 | 1995-04-01 | 0.200 |
| 543 | 1995-05-01 | 0.235 |
InΒ [Β ]:
plt.figure(figsize=(20, 8))
flow_min.plot(x='Date', y='Value', kind='line', ax=plt.gca(), color='#8c564b', fontsize=12)
plt.xlabel('Date', fontdict={'fontsize':20})
plt.ylabel('Min Flow (m3/s)', fontdict={'fontsize':20})
plt.xticks(size = 20)
plt.yticks(size = 20)
plt.grid(linestyle='--')
plt.tight_layout()
plt.legend().remove()
plt.show()
InΒ [Β ]:
flow_min[flow_min.Value > 1]
Out[Β ]:
| Date | Value |
|---|
InΒ [Β ]:
# Add fuller test to check the stationarity of the dataset
def adfuller_test(values):
result = adfuller(values)
labels = ['ADF Test Statistic','p-value','#Lags Used', 'Number of Observations Used']
for value, label in zip(result, labels):
print(label+' : '+str(value))
if result[1] <= 0.05:
print("Strong evidence against the null hypothesis(H0)")
else:
print("Weak evidence against the null hypothesis(H0)")
InΒ [Β ]:
# H0: It is non stationary
# H1: It is stationary
InΒ [Β ]:
adfuller_test(flow_min['Value'])
ADF Test Statistic : -2.3097085744112578 p-value : 0.16885587943215408 #Lags Used : 13 Number of Observations Used : 310 Weak evidence against the null hypothesis(H0)
InΒ [Β ]:
flow_min = flow_min[['Date', 'Value']]
flow_min.set_index('Date', inplace=True)
flow_min.head()
Out[Β ]:
| Value | |
|---|---|
| Date | |
| 1995-01-01 | 0.212 |
| 1995-02-01 | 0.223 |
| 1995-03-01 | 0.212 |
| 1995-04-01 | 0.200 |
| 1995-05-01 | 0.235 |
InΒ [Β ]:
flow_min_model_names = []
flow_min_mae_values = []
flow_min_rmse_values = []
flow_min_r2_values = []
flow_min_nse_values = []
flow_min_willmott_values = []
TRAIN THE MODELΒΆ
InΒ [Β ]:
# Assuming `flow min` is your time series DataFrame
series = flow_min
values = series.values
values = add_rolling_features(values, window=3)
values = pd.DataFrame(values).values
scaler = StandardScaler()
values = scaler.fit_transform(values)
data = series_to_supervised(values, n_in=6)
data_df = pd.DataFrame(data, index=series.index[6+2:])
InΒ [Β ]:
# Model names
models = [
('LightGBM', lightgbm_forecast),
('XGBoost', xgboost_forecast),
('SVM', svm_forecast),
('Random Forest', random_forest_forecast)
]
# Clear existing lists in case they contain previous values
model_names.clear()
mae_values.clear()
rmse_values.clear()
r2_values.clear()
nse_values.clear()
willmott_values.clear()
# Iterate over each model, perform walk-forward validation, and append metrics to lists
for model_name, model_func in models:
mae, rmse, r2, nse, willmott, test_index, y, yhat = walk_forward_validation(data_df, model_func)
model_names.append(model_name)
mae_values.append(mae)
rmse_values.append(rmse)
r2_values.append(r2)
nse_values.append(nse)
willmott_values.append(willmott)
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics_df = pd.DataFrame(metrics)
# Plot the actual vs predicted values for each model
plt.figure(figsize=(16, 6))
plt.plot(test_index, y, label='Actual', linestyle='--', color='black', linewidth=2)
for model_name, model_func in models:
_, _, _, _, _, test_index, _, yhat = walk_forward_validation(data_df, model_func)
plt.plot(test_index, yhat, label=f'{model_name} Predicted', linestyle='--', color=colors_dict[model_name], linewidth=2)
plt.xlabel('Year and Month', fontsize=18)
plt.ylabel('Scaled Min Flow (m3/s)', fontsize=18)
plt.legend(fontsize=14, loc='upper right', bbox_to_anchor=(0.6, 1.15), ncol=1)
plt.xticks(rotation=45, fontsize=14)
plt.yticks(fontsize=14)
plt.grid(axis='y', linestyle='--')
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.tight_layout()
plt.show()
>expected=-0.7, predicted=-0.5 >expected=-0.4, predicted=-0.5 >expected=0.0, predicted=0.5 >expected=0.3, predicted=0.7 >expected=-0.1, predicted=0.0 >expected=0.9, predicted=0.4 >expected=0.3, predicted=0.4 >expected=-0.3, predicted=-0.3 >expected=-0.1, predicted=-0.3 >expected=-0.2, predicted=-0.3 >expected=-0.7, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=1.1, predicted=0.5 >expected=1.4, predicted=0.8 >expected=1.0, predicted=0.9 >expected=1.3, predicted=0.5 >expected=-0.4, predicted=0.2 >expected=-0.5, predicted=-0.4 >expected=3.3, predicted=0.7 >expected=2.8, predicted=2.3 >expected=0.7, predicted=2.6 >expected=0.6, predicted=1.1 >expected=2.1, predicted=0.7 >expected=2.5, predicted=1.3 >expected=-0.1, predicted=-0.1 >expected=1.2, predicted=0.9 >expected=0.7, predicted=1.6 >expected=0.2, predicted=1.0 >expected=0.1, predicted=0.6 >expected=-0.3, predicted=0.0 >expected=-0.7, predicted=-0.4 >expected=-0.4, predicted=-0.5 >expected=0.0, predicted=0.0 >expected=0.3, predicted=0.6 >expected=-0.1, predicted=0.2 >expected=0.9, predicted=0.5 >expected=0.3, predicted=-0.2 >expected=-0.3, predicted=-0.4 >expected=-0.1, predicted=-0.2 >expected=-0.2, predicted=-0.3 >expected=-0.7, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=1.1, predicted=0.8 >expected=1.4, predicted=1.5 >expected=1.0, predicted=0.7 >expected=1.3, predicted=1.0 >expected=-0.4, predicted=-0.2 >expected=-0.5, predicted=-0.4 >expected=3.3, predicted=1.1 >expected=2.8, predicted=2.3 >expected=0.7, predicted=2.9 >expected=0.6, predicted=0.3 >expected=2.1, predicted=1.0 >expected=2.5, predicted=0.4 >expected=-0.1, predicted=-0.3 >expected=1.2, predicted=0.8 >expected=0.7, predicted=0.4 >expected=0.2, predicted=0.2 >expected=0.1, predicted=0.8 >expected=-0.3, predicted=-0.3 >expected=-0.7, predicted=-0.5 >expected=-0.4, predicted=-0.3 >expected=0.0, predicted=0.1 >expected=0.3, predicted=0.5 >expected=-0.1, predicted=0.1 >expected=0.9, predicted=-0.1 >expected=0.3, predicted=0.1 >expected=-0.3, predicted=-0.7 >expected=-0.1, predicted=-0.4 >expected=-0.2, predicted=-0.6 >expected=-0.7, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=1.1, predicted=0.5 >expected=1.4, predicted=1.2 >expected=1.0, predicted=0.1 >expected=1.3, predicted=0.7 >expected=-0.4, predicted=0.2 >expected=-0.5, predicted=-0.3 >expected=3.3, predicted=1.0 >expected=2.8, predicted=1.6 >expected=0.7, predicted=1.5 >expected=0.6, predicted=1.1 >expected=2.1, predicted=1.0 >expected=2.5, predicted=1.4 >expected=-0.1, predicted=0.2 >expected=1.2, predicted=-0.0 >expected=0.7, predicted=0.9 >expected=0.2, predicted=0.8 >expected=0.1, predicted=-0.0 >expected=-0.3, predicted=-0.2 >expected=-0.7, predicted=-0.3 >expected=-0.4, predicted=-0.4 >expected=0.0, predicted=0.0 >expected=0.3, predicted=0.3 >expected=-0.1, predicted=0.1 >expected=0.9, predicted=0.3 >expected=0.3, predicted=-0.1 >expected=-0.3, predicted=-0.2 >expected=-0.1, predicted=-0.3 >expected=-0.2, predicted=-0.3 >expected=-0.7, predicted=-0.6 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=1.1, predicted=0.6 >expected=1.4, predicted=0.9 >expected=1.0, predicted=0.8 >expected=1.3, predicted=0.9 >expected=-0.4, predicted=-0.1 >expected=-0.5, predicted=-0.5 >expected=3.3, predicted=0.9 >expected=2.8, predicted=3.0 >expected=0.7, predicted=3.1 >expected=0.6, predicted=2.0 >expected=2.1, predicted=1.0 >expected=2.5, predicted=1.3 >expected=-0.1, predicted=-0.1 >expected=1.2, predicted=1.2 >expected=0.7, predicted=1.1 >expected=0.2, predicted=0.6 >expected=0.1, predicted=0.9 >expected=-0.3, predicted=-0.1 >expected=-0.7, predicted=-0.5 >expected=-0.4, predicted=-0.5 >expected=0.0, predicted=0.5 >expected=0.3, predicted=0.7 >expected=-0.1, predicted=0.0 >expected=0.9, predicted=0.4 >expected=0.3, predicted=0.4 >expected=-0.3, predicted=-0.3 >expected=-0.1, predicted=-0.3 >expected=-0.2, predicted=-0.3 >expected=-0.7, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=1.1, predicted=0.5 >expected=1.4, predicted=0.8 >expected=1.0, predicted=0.9 >expected=1.3, predicted=0.5 >expected=-0.4, predicted=0.2 >expected=-0.5, predicted=-0.4 >expected=3.3, predicted=0.7 >expected=2.8, predicted=2.3 >expected=0.7, predicted=2.6 >expected=0.6, predicted=1.1 >expected=2.1, predicted=0.7 >expected=2.5, predicted=1.3 >expected=-0.1, predicted=-0.1 >expected=1.2, predicted=0.9 >expected=0.7, predicted=1.6 >expected=0.2, predicted=1.0 >expected=0.1, predicted=0.6 >expected=-0.3, predicted=0.0 >expected=-0.7, predicted=-0.4 >expected=-0.4, predicted=-0.5 >expected=0.0, predicted=0.0 >expected=0.3, predicted=0.6 >expected=-0.1, predicted=0.2 >expected=0.9, predicted=0.5 >expected=0.3, predicted=-0.2 >expected=-0.3, predicted=-0.4 >expected=-0.1, predicted=-0.2 >expected=-0.2, predicted=-0.3 >expected=-0.7, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=1.1, predicted=0.8 >expected=1.4, predicted=1.5 >expected=1.0, predicted=0.7 >expected=1.3, predicted=1.0 >expected=-0.4, predicted=-0.2 >expected=-0.5, predicted=-0.4 >expected=3.3, predicted=1.1 >expected=2.8, predicted=2.3 >expected=0.7, predicted=2.9 >expected=0.6, predicted=0.3 >expected=2.1, predicted=1.0 >expected=2.5, predicted=0.4 >expected=-0.1, predicted=-0.3 >expected=1.2, predicted=0.8 >expected=0.7, predicted=0.4 >expected=0.2, predicted=0.2 >expected=0.1, predicted=0.8 >expected=-0.3, predicted=-0.3 >expected=-0.7, predicted=-0.5 >expected=-0.4, predicted=-0.3 >expected=0.0, predicted=0.1 >expected=0.3, predicted=0.5 >expected=-0.1, predicted=0.1 >expected=0.9, predicted=-0.1 >expected=0.3, predicted=0.1 >expected=-0.3, predicted=-0.7 >expected=-0.1, predicted=-0.4 >expected=-0.2, predicted=-0.6 >expected=-0.7, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=1.1, predicted=0.5 >expected=1.4, predicted=1.2 >expected=1.0, predicted=0.1 >expected=1.3, predicted=0.7 >expected=-0.4, predicted=0.2 >expected=-0.5, predicted=-0.3 >expected=3.3, predicted=1.0 >expected=2.8, predicted=1.6 >expected=0.7, predicted=1.5 >expected=0.6, predicted=1.1 >expected=2.1, predicted=1.0 >expected=2.5, predicted=1.4 >expected=-0.1, predicted=0.2 >expected=1.2, predicted=-0.0 >expected=0.7, predicted=0.9 >expected=0.2, predicted=0.8 >expected=0.1, predicted=-0.0 >expected=-0.3, predicted=-0.2 >expected=-0.7, predicted=-0.4 >expected=-0.4, predicted=-0.2 >expected=0.0, predicted=-0.0 >expected=0.3, predicted=0.3 >expected=-0.1, predicted=-0.0 >expected=0.9, predicted=0.1 >expected=0.3, predicted=-0.0 >expected=-0.3, predicted=-0.2 >expected=-0.1, predicted=-0.3 >expected=-0.2, predicted=-0.3 >expected=-0.7, predicted=-0.6 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=1.1, predicted=0.6 >expected=1.4, predicted=0.9 >expected=1.0, predicted=0.4 >expected=1.3, predicted=0.6 >expected=-0.4, predicted=0.1 >expected=-0.5, predicted=-0.5 >expected=3.3, predicted=0.8 >expected=2.8, predicted=2.5 >expected=0.7, predicted=2.7 >expected=0.6, predicted=1.4 >expected=2.1, predicted=1.0 >expected=2.5, predicted=1.1 >expected=-0.1, predicted=0.1 >expected=1.2, predicted=0.9 >expected=0.7, predicted=1.2 >expected=0.2, predicted=0.4 >expected=0.1, predicted=0.7 >expected=-0.3, predicted=0.1
EVALUATE THE MODELΒΆ
InΒ [Β ]:
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics = pd.DataFrame(metrics)
InΒ [Β ]:
metrics
Out[Β ]:
| Model | MAE | RMSE | R-squared | Nash-Sutcliffe Efficiency | Willmott's Index of Agreement | |
|---|---|---|---|---|---|---|
| 0 | LightGBM | 0.510560 | 0.764473 | 0.494032 | 0.494032 | 0.814135 |
| 1 | XGBoost | 0.422930 | 0.735053 | 0.532226 | 0.532226 | 0.836220 |
| 2 | SVM | 0.500558 | 0.709194 | 0.564559 | 0.564559 | 0.831898 |
| 3 | Random Forest | 0.466310 | 0.770881 | 0.485514 | 0.485514 | 0.833792 |
InΒ [Β ]:
metrics = metrics.round(2)
fig, ax = plt.subplots(figsize=(20, 4))
ax.axis('off')
table = ax.table(cellText=metrics.values,
colLabels=metrics.columns,
cellLoc='center',
loc='center',
bbox=[0, 0, 1, 1])
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2)
header_color = '#D0D0D0'
data_color = '#FFFFFF'
for i, key in enumerate(table._cells):
cell = table._cells[key]
cell.set_edgecolor('black')
if key[0] == 0 or key[1] == -1:
cell.set_text_props(weight='bold')
cell.set_facecolor(header_color)
else:
cell.set_facecolor(data_color)
plt.title('Models Performance on Min Flow')
plt.show()
InΒ [Β ]:
colors = ['#27ad81', '#5dc863', '#aadc32', '#fde725']
bar_width = 0.4
# Metrics and Titles
metrics_title = {
'MAE': 'Mean Absolute Error (MAE)',
'RMSE': 'Root Mean Squared Error (RMSE)',
'R-squared': 'R-squared',
'Nash-Sutcliffe Efficiency': 'Nash-Sutcliffe Efficiency',
'Willmott\'s Index of Agreement': 'Willmott\'s Index of Agreement'
}
# Plot each metric separately
for metric, title in metrics_title.items():
fig, ax = plt.subplots(figsize=(8, 6))
ax.bar(metrics['Model'], metrics[metric], color=colors, width=bar_width)
# ax.set_title(title, fontsize=16)
ax.set_ylabel(metric, fontsize=16)
ax.tick_params(axis='x', rotation=30, labelsize=16)
ax.tick_params(axis='y', labelsize=16)
plt.tight_layout()
plt.show()
10. RUNOFF (m3/s) ANALYSISΒΆ
InΒ [Β ]:
# Get the wind_min values
runoff = transformed_data[transformed_data['Parameter'] == parameters[4]]
runoff.head()
Out[Β ]:
| Parameter | Date | Value | |
|---|---|---|---|
| 4 | Runoff | 1995-01-01 | 1.658000 |
| 9 | Runoff | 1996-01-01 | 1.095387 |
| 14 | Runoff | 1997-01-01 | 1.856000 |
| 19 | Runoff | 1998-01-01 | 1.930000 |
| 24 | Runoff | 1999-01-01 | 9.392000 |
InΒ [Β ]:
# drop the parameter columns
runoff = runoff.drop(columns=['Parameter'])
InΒ [Β ]:
runoff.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 4 | 1995-01-01 | 1.658000 |
| 9 | 1996-01-01 | 1.095387 |
| 14 | 1997-01-01 | 1.856000 |
| 19 | 1998-01-01 | 1.930000 |
| 24 | 1999-01-01 | 9.392000 |
InΒ [Β ]:
# sort according to the date
runoff = runoff.sort_values(by='Date')
runoff.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 4 | 1995-01-01 | 1.658 |
| 139 | 1995-02-01 | 1.158 |
| 274 | 1995-03-01 | 1.861 |
| 409 | 1995-04-01 | 1.554 |
| 544 | 1995-05-01 | 2.298 |
InΒ [Β ]:
runoff.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 4 | 1995-01-01 | 1.658 |
| 139 | 1995-02-01 | 1.158 |
| 274 | 1995-03-01 | 1.861 |
| 409 | 1995-04-01 | 1.554 |
| 544 | 1995-05-01 | 2.298 |
InΒ [Β ]:
plt.figure(figsize=(20, 8))
runoff.plot(x='Date', y='Value', kind='line', ax=plt.gca(), color='#ff7f0e', fontsize=12)
plt.xlabel('Date', fontdict={'fontsize':20})
plt.ylabel('Runoff (m3/s)', fontdict={'fontsize':20})
plt.xticks(size = 20)
plt.yticks(size = 20)
plt.grid(linestyle='--')
plt.tight_layout()
plt.legend().remove()
plt.show()
InΒ [Β ]:
# Add fuller test to check the stationarity of the dataset
def adfuller_test(values):
result = adfuller(values)
labels = ['ADF Test Statistic','p-value','#Lags Used', 'Number of Observations Used']
for value, label in zip(result, labels):
print(label+' : '+str(value))
if result[1] <= 0.05:
print("Strong evidence against the null hypothesis(H0)")
else:
print("Weak evidence against the null hypothesis(H0)")
InΒ [Β ]:
# H0: It is non stationary
# H1: It is stationary
InΒ [Β ]:
adfuller_test(runoff['Value'])
ADF Test Statistic : -4.186222397590971 p-value : 0.0006951911057647956 #Lags Used : 13 Number of Observations Used : 310 Strong evidence against the null hypothesis(H0)
InΒ [Β ]:
runoff = runoff[['Date', 'Value']]
runoff.set_index('Date', inplace=True)
runoff.head()
Out[Β ]:
| Value | |
|---|---|
| Date | |
| 1995-01-01 | 1.658 |
| 1995-02-01 | 1.158 |
| 1995-03-01 | 1.861 |
| 1995-04-01 | 1.554 |
| 1995-05-01 | 2.298 |
InΒ [Β ]:
runoff_model_names = []
runoff_mae_values = []
runoff_rmse_values = []
runoff_r2_values = []
runoff_nse_values = []
runoff_willmott_values = []
TRAIN THE MODELΒΆ
InΒ [Β ]:
# Assuming `runoff` is your time series DataFrame
series = runoff
values = series.values
values = add_rolling_features(values, window=3)
values = pd.DataFrame(values).values
scaler = StandardScaler()
values = scaler.fit_transform(values)
data = series_to_supervised(values, n_in=6)
data_df = pd.DataFrame(data, index=series.index[6+2:])
InΒ [Β ]:
# Model names
models = [
('LightGBM', lightgbm_forecast),
('XGBoost', xgboost_forecast),
('SVM', svm_forecast),
('Random Forest', random_forest_forecast)
]
# Clear existing lists in case they contain previous values
model_names.clear()
mae_values.clear()
rmse_values.clear()
r2_values.clear()
nse_values.clear()
willmott_values.clear()
# Iterate over each model, perform walk-forward validation, and append metrics to lists
for model_name, model_func in models:
mae, rmse, r2, nse, willmott, test_index, y, yhat = walk_forward_validation(data_df, model_func)
model_names.append(model_name)
mae_values.append(mae)
rmse_values.append(rmse)
r2_values.append(r2)
nse_values.append(nse)
willmott_values.append(willmott)
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics_df = pd.DataFrame(metrics)
# Plot the actual vs predicted values for each model
plt.figure(figsize=(16, 6))
plt.plot(test_index, y, label='Actual', linestyle='--', color='black', linewidth=2)
for model_name, model_func in models:
_, _, _, _, _, test_index, _, yhat = walk_forward_validation(data_df, model_func)
plt.plot(test_index, yhat, label=f'{model_name} Predicted', linestyle='--', color=colors_dict[model_name], linewidth=2)
plt.xlabel('Year and Month', fontsize=18)
plt.ylabel('Scaled Runoff (m3/s)', fontsize=18)
plt.legend(fontsize=14, loc='upper right', bbox_to_anchor=(0.6, 1.15), ncol=1)
plt.xticks(rotation=45, fontsize=14)
plt.yticks(fontsize=14)
plt.grid(axis='y', linestyle='--')
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.tight_layout()
plt.show()
>expected=-0.5, predicted=-0.4 >expected=0.1, predicted=-0.0 >expected=0.6, predicted=0.7 >expected=0.4, predicted=-0.0 >expected=0.5, predicted=0.3 >expected=0.7, predicted=0.5 >expected=1.0, predicted=0.8 >expected=-0.6, predicted=-0.1 >expected=-0.2, predicted=-0.6 >expected=-0.3, predicted=-0.6 >expected=-0.9, predicted=-0.7 >expected=-0.9, predicted=-0.8 >expected=-0.9, predicted=-0.7 >expected=-0.9, predicted=-0.6 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.5 >expected=-0.8, predicted=-0.6 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.9, predicted=-0.7 >expected=-0.6, predicted=-0.6 >expected=-0.6, predicted=-0.6 >expected=-0.8, predicted=-0.6 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.7 >expected=-0.5, predicted=-0.7 >expected=-0.3, predicted=-0.3 >expected=-0.4, predicted=-0.3 >expected=-0.4, predicted=-0.5 >expected=-0.8, predicted=-0.5 >expected=-0.7, predicted=-0.8 >expected=-0.5, predicted=-0.2 >expected=0.1, predicted=-0.1 >expected=0.6, predicted=0.6 >expected=0.4, predicted=0.0 >expected=0.5, predicted=0.2 >expected=0.7, predicted=0.5 >expected=1.0, predicted=0.5 >expected=-0.6, predicted=-0.4 >expected=-0.2, predicted=-0.5 >expected=-0.3, predicted=-0.3 >expected=-0.9, predicted=-0.5 >expected=-0.9, predicted=-0.6 >expected=-0.9, predicted=-0.9 >expected=-0.9, predicted=-0.8 >expected=-0.8, predicted=-0.6 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.9, predicted=-0.8 >expected=-0.6, predicted=-0.8 >expected=-0.6, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.5, predicted=-0.6 >expected=-0.3, predicted=0.2 >expected=-0.4, predicted=-0.2 >expected=-0.4, predicted=-0.2 >expected=-0.8, predicted=-0.7 >expected=-0.7, predicted=-0.8 >expected=-0.5, predicted=-0.5 >expected=0.1, predicted=0.1 >expected=0.6, predicted=0.8 >expected=0.4, predicted=0.3 >expected=0.5, predicted=0.2 >expected=0.7, predicted=0.5 >expected=1.0, predicted=0.8 >expected=-0.6, predicted=-0.2 >expected=-0.2, predicted=-0.8 >expected=-0.3, predicted=-0.7 >expected=-0.9, predicted=-0.8 >expected=-0.9, predicted=-0.9 >expected=-0.9, predicted=-0.8 >expected=-0.9, predicted=-0.8 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.9, predicted=-0.8 >expected=-0.6, predicted=-0.6 >expected=-0.6, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.5, predicted=-0.6 >expected=-0.3, predicted=-0.3 >expected=-0.4, predicted=-0.6 >expected=-0.4, predicted=-0.6 >expected=-0.8, predicted=-0.6 >expected=-0.7, predicted=-0.9 >expected=-0.5, predicted=-0.4 >expected=0.1, predicted=0.0 >expected=0.6, predicted=0.5 >expected=0.4, predicted=0.2 >expected=0.5, predicted=0.2 >expected=0.7, predicted=0.4 >expected=1.0, predicted=0.6 >expected=-0.6, predicted=-0.4 >expected=-0.2, predicted=-0.4 >expected=-0.3, predicted=-0.5 >expected=-0.9, predicted=-0.7 >expected=-0.9, predicted=-0.7 >expected=-0.9, predicted=-0.8 >expected=-0.9, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.9, predicted=-0.8 >expected=-0.6, predicted=-0.9 >expected=-0.6, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.5, predicted=-0.6 >expected=-0.3, predicted=0.1 >expected=-0.4, predicted=-0.2 >expected=-0.4, predicted=-0.4 >expected=-0.8, predicted=-0.5 >expected=-0.7, predicted=-0.7 >expected=-0.5, predicted=-0.4 >expected=0.1, predicted=-0.0 >expected=0.6, predicted=0.7 >expected=0.4, predicted=-0.0 >expected=0.5, predicted=0.3 >expected=0.7, predicted=0.5 >expected=1.0, predicted=0.8 >expected=-0.6, predicted=-0.1 >expected=-0.2, predicted=-0.6 >expected=-0.3, predicted=-0.6 >expected=-0.9, predicted=-0.7 >expected=-0.9, predicted=-0.8 >expected=-0.9, predicted=-0.7 >expected=-0.9, predicted=-0.6 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.5 >expected=-0.8, predicted=-0.6 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.9, predicted=-0.7 >expected=-0.6, predicted=-0.6 >expected=-0.6, predicted=-0.6 >expected=-0.8, predicted=-0.6 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.7 >expected=-0.5, predicted=-0.7 >expected=-0.3, predicted=-0.3 >expected=-0.4, predicted=-0.3 >expected=-0.4, predicted=-0.5 >expected=-0.8, predicted=-0.5 >expected=-0.7, predicted=-0.8 >expected=-0.5, predicted=-0.2 >expected=0.1, predicted=-0.1 >expected=0.6, predicted=0.6 >expected=0.4, predicted=0.0 >expected=0.5, predicted=0.2 >expected=0.7, predicted=0.5 >expected=1.0, predicted=0.5 >expected=-0.6, predicted=-0.4 >expected=-0.2, predicted=-0.5 >expected=-0.3, predicted=-0.3 >expected=-0.9, predicted=-0.5 >expected=-0.9, predicted=-0.6 >expected=-0.9, predicted=-0.9 >expected=-0.9, predicted=-0.8 >expected=-0.8, predicted=-0.6 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.9, predicted=-0.8 >expected=-0.6, predicted=-0.8 >expected=-0.6, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.5, predicted=-0.6 >expected=-0.3, predicted=0.2 >expected=-0.4, predicted=-0.2 >expected=-0.4, predicted=-0.2 >expected=-0.8, predicted=-0.7 >expected=-0.7, predicted=-0.8 >expected=-0.5, predicted=-0.5 >expected=0.1, predicted=0.1 >expected=0.6, predicted=0.8 >expected=0.4, predicted=0.3 >expected=0.5, predicted=0.2 >expected=0.7, predicted=0.5 >expected=1.0, predicted=0.8 >expected=-0.6, predicted=-0.2 >expected=-0.2, predicted=-0.8 >expected=-0.3, predicted=-0.7 >expected=-0.9, predicted=-0.8 >expected=-0.9, predicted=-0.9 >expected=-0.9, predicted=-0.8 >expected=-0.9, predicted=-0.8 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.9, predicted=-0.8 >expected=-0.6, predicted=-0.6 >expected=-0.6, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.5, predicted=-0.6 >expected=-0.3, predicted=-0.3 >expected=-0.4, predicted=-0.6 >expected=-0.4, predicted=-0.6 >expected=-0.8, predicted=-0.6 >expected=-0.7, predicted=-0.9 >expected=-0.5, predicted=-0.4 >expected=0.1, predicted=-0.1 >expected=0.6, predicted=0.5 >expected=0.4, predicted=0.1 >expected=0.5, predicted=0.1 >expected=0.7, predicted=0.5 >expected=1.0, predicted=0.7 >expected=-0.6, predicted=-0.3 >expected=-0.2, predicted=-0.4 >expected=-0.3, predicted=-0.5 >expected=-0.9, predicted=-0.7 >expected=-0.9, predicted=-0.7 >expected=-0.9, predicted=-0.8 >expected=-0.9, predicted=-0.8 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.9, predicted=-0.8 >expected=-0.6, predicted=-0.8 >expected=-0.6, predicted=-0.8 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=-0.5, predicted=-0.6 >expected=-0.3, predicted=-0.0 >expected=-0.4, predicted=-0.3 >expected=-0.4, predicted=-0.3 >expected=-0.8, predicted=-0.5 >expected=-0.7, predicted=-0.7
EVALUATE THE MODELΒΆ
InΒ [Β ]:
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics = pd.DataFrame(metrics)
InΒ [Β ]:
metrics
Out[Β ]:
| Model | MAE | RMSE | R-squared | Nash-Sutcliffe Efficiency | Willmott's Index of Agreement | |
|---|---|---|---|---|---|---|
| 0 | LightGBM | 0.171717 | 0.204025 | 0.848730 | 0.848730 | 0.952940 |
| 1 | XGBoost | 0.156541 | 0.212774 | 0.835479 | 0.835479 | 0.949176 |
| 2 | SVM | 0.133483 | 0.184586 | 0.876182 | 0.876182 | 0.965221 |
| 3 | Random Forest | 0.131083 | 0.173587 | 0.890499 | 0.890499 | 0.966709 |
InΒ [Β ]:
metrics = metrics.round(2)
fig, ax = plt.subplots(figsize=(20, 4))
ax.axis('off')
table = ax.table(cellText=metrics.values,
colLabels=metrics.columns,
cellLoc='center',
loc='center',
bbox=[0, 0, 1, 1])
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2)
header_color = '#D0D0D0'
data_color = '#FFFFFF'
for i, key in enumerate(table._cells):
cell = table._cells[key]
cell.set_edgecolor('black')
if key[0] == 0 or key[1] == -1:
cell.set_text_props(weight='bold')
cell.set_facecolor(header_color)
else:
cell.set_facecolor(data_color)
plt.title('Models Performance on Runoff (m3/s)')
plt.show()
InΒ [Β ]:
colors = ['#27ad81', '#5dc863', '#aadc32', '#fde725']
bar_width = 0.4
# Metrics and Titles
metrics_title = {
'MAE': 'Mean Absolute Error (MAE)',
'RMSE': 'Root Mean Squared Error (RMSE)',
'R-squared': 'R-squared',
'Nash-Sutcliffe Efficiency': 'Nash-Sutcliffe Efficiency',
'Willmott\'s Index of Agreement': 'Willmott\'s Index of Agreement'
}
# Plot each metric separately
for metric, title in metrics_title.items():
fig, ax = plt.subplots(figsize=(8, 6))
ax.bar(metrics['Model'], metrics[metric], color=colors, width=bar_width)
# ax.set_title(title, fontsize=16)
ax.set_ylabel(metric, fontsize=16)
ax.tick_params(axis='x', rotation=30, labelsize=16)
ax.tick_params(axis='y', labelsize=16)
plt.tight_layout()
plt.show()